1 Introduction

In the previous notebook, we filtered and normalized the expression matrix. In this notebook, we aim to annotate all cells to their corresponding cell type. Furthermore, we will try to identify cycling cells.

2 Pre-processing

2.1 Package loading

library(scater)
library(scran)
library(Seurat)
library(ggpubr)
library(kBET)
library(cluster)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(viridis)
library(tidyverse)

2.2 Source script with functions

source("bin/utils.R")

2.3 Load data

t_act_l <- readRDS("results/R_objects/t_act_Seurat_list_filtered_normalized.rds")

Note that this analysis was done with the first 2 donors (replicates). We will analyse the remaining two (rep2) in a subsequent notebook:

t_act_l <- t_act_l[c("day_0_rep1", "day_2_rep1")]
names(t_act_l) <- c("day_0", "day_2")

3 Find Variable Genes

To cluster our cells, we need to overcome 2 challenges:

  1. The ‘curse of dimensionality’: as each cell can be conceived as a vector with >10,000 genes, and as two random cells will have the very similar expression for most genes, the distance measured between any given pair of cells will be very low, thus being unreliable for proper comparisons.
  2. Computational complexity: as the data is highly dimensional, even the most greedy algorithm will take long to complete.
  3. Most genes should not be differentially expressed between cells, so the observed differences in such genes will be due to technical issues or transient biological states, that may confound the true structure in the dataset.

Thus, we aim to eliminate redundancy and denoise the dataset. Hence, we will find the subset of genes that drive most of the variability in the expression matrix (feature selection). We are going to analyze each day and donor separately, as we need as much resolution as possible to detect the markers.

t_act_0 <- SplitObject(t_act_l$day_0, split.by = "donor")
t_act_2 <- SplitObject(t_act_l$day_2, split.by = "donor")
t_act_l <- list(t_act_0$Male, t_act_0$Female, t_act_2$Male, t_act_2$Female)
names(t_act_l) <- c("0M", "0F", "2M", "2F")
t_act_l <- purrr::map(t_act_l, FindVariableFeatures)
var_plots <- purrr::map(t_act_l, VariableFeaturePlot)
var_plots <- purrr::map2(t_act_l, var_plots, function(seurat, p) {
  LabelPoints(
  plot = p, 
  points =  head(VariableFeatures(seurat), 10), 
  repel = TRUE
  )
})
var_plots
## $`0M`

## 
## $`0F`

## 
## $`2M`

## 
## $`2F`

As we can see, we reduce the number of dimensions from >10,000 genes to ~2000 highly variable genes (HVG). We observe that among the top HVG there are well-known PBMC markers, such as LYZ, S100A8 (monocytes), GNLY and NKG7 (natural killer and CD8+ T cells). Moreover, we see that IFNG is amongst the most variable genes in day 2, which is consistent with the fact that the T cells in this dataset were activated with anti-CD3 antibodies.

4 Scale data

An important pre-processing step in any cluster analysis is to scale the data, as otherwise variables with a higher mean will have a higher weight in the distance metric:

t_act_l <- purrr::map(t_act_l, ScaleData)

5 Linear dimensionality reduction (PCA)

An additional challenge in our cluster analysis is that scRNAs-seq is very noisy (very susceptible to technical artifacts), and very sparse (contains drop-outs). Thus, differences in single genes may not be accurate to identify cell types. To that end, we can perform PCA, as each PC can be conceived as a ‘metagene’ that includes information across a correlated gene set. Furthermore, we will reduce the dimensionality even more:

t_act_l <- purrr::map(t_act_l, RunPCA)
purrr::map(t_act_l, VizDimLoadings, dims = 1:3, reduction = "pca")
## $`0M`

## 
## $`0F`

## 
## $`2M`

## 
## $`2F`

purrr::map(t_act_l, DimPlot, reduction = "pca")
## $`0M`

## 
## $`0F`

## 
## $`2M`

## 
## $`2F`

6 Cluster cells

To cluster cells we will used the Seurat’s built-in functions FindNeighbors and FindClusters, which use the graph-based Louvain algorithm. The critical parameter for these functions is the resolution, which will determine the final number of clusters. To decide it, we will compute the silhouette width for varying resolutions, and choose the one that maximizes it.

t_act_l <- purrr::map(t_act_l, FindNeighbors, dims = 1:20)
resolutions <- c(
  0.005,
  seq(0.01, 0.1, by = 0.01),
  seq(0.1, 1, by = 0.1), seq(1, 10, by = 1)
)
avgs_sil_width_dfs <- purrr::map2(t_act_l, names(t_act_l), function(seurat, condition) {
  print(condition)
  avgs_sil_width_df <- data.frame(num_k = c(), sil_width = c(), resolution = c())
  num_k <- 1
  for (res in resolutions) {
    print(str_c("Current resolution is ", res))
    seurat <-  FindClusters(seurat, resolution = res, verbose = FALSE)
    curr_num_k <- length(levels(seurat$seurat_clusters))
    if (curr_num_k == num_k) {
      next
    } else {
      sil_width <- calc_sil_width(
        object = seurat, 
        clusters = seurat$seurat_clusters, 
        npcs = 5, 
        ncell = 2500
      )
      print(str_c("Current silhouette width is ", sil_width))
      curr_df <- data.frame(num_k = curr_num_k, sil_width = sil_width, resolution = res)
      avgs_sil_width_df <- rbind(avgs_sil_width_df, curr_df)
      num_k <- curr_num_k 
      print(str_c("Current number of clusters is ", num_k))
    }
  }
  avgs_sil_width_df
})
## [1] "0M"
## [1] "Current resolution is 0.005"
## [1] "Current silhouette width is -0.00990317498698955"
## [1] "Current number of clusters is 4"
## [1] "Current resolution is 0.01"
## [1] "Current resolution is 0.02"
## [1] "Current resolution is 0.03"
## [1] "Current resolution is 0.04"
## [1] "Current resolution is 0.05"
## [1] "Current resolution is 0.06"
## [1] "Current silhouette width is -0.0705609609545215"
## [1] "Current number of clusters is 5"
## [1] "Current resolution is 0.07"
## [1] "Current resolution is 0.08"
## [1] "Current resolution is 0.09"
## [1] "Current silhouette width is -0.0926675502184887"
## [1] "Current number of clusters is 6"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.2"
## [1] "Current silhouette width is -0.106183361595093"
## [1] "Current number of clusters is 9"
## [1] "Current resolution is 0.3"
## [1] "Current silhouette width is -0.0560642220878356"
## [1] "Current number of clusters is 10"
## [1] "Current resolution is 0.4"
## [1] "Current resolution is 0.5"
## [1] "Current silhouette width is -0.0607314587528802"
## [1] "Current number of clusters is 11"
## [1] "Current resolution is 0.6"
## [1] "Current resolution is 0.7"
## [1] "Current resolution is 0.8"
## [1] "Current resolution is 0.9"
## [1] "Current silhouette width is -0.0788722605822153"
## [1] "Current number of clusters is 14"
## [1] "Current resolution is 1"
## [1] "Current resolution is 1"
## [1] "Current resolution is 2"
## [1] "Current silhouette width is -0.13573366505029"
## [1] "Current number of clusters is 24"
## [1] "Current resolution is 3"
## [1] "Current silhouette width is -0.126700752962485"
## [1] "Current number of clusters is 28"
## [1] "Current resolution is 4"
## [1] "Current silhouette width is -0.147970248428551"
## [1] "Current number of clusters is 33"
## [1] "Current resolution is 5"
## [1] "Current silhouette width is -0.270915774589304"
## [1] "Current number of clusters is 41"
## [1] "Current resolution is 6"
## [1] "Current silhouette width is -0.212956421522404"
## [1] "Current number of clusters is 46"
## [1] "Current resolution is 7"
## [1] "Current silhouette width is -0.308938565096131"
## [1] "Current number of clusters is 55"
## [1] "Current resolution is 8"
## [1] "Current silhouette width is -0.372686532944749"
## [1] "Current number of clusters is 61"
## [1] "Current resolution is 9"
## [1] "Current silhouette width is -0.312004197758309"
## [1] "Current number of clusters is 65"
## [1] "Current resolution is 10"
## [1] "Current silhouette width is -0.366741757046182"
## [1] "Current number of clusters is 67"
## [1] "0F"
## [1] "Current resolution is 0.005"
## [1] "Current silhouette width is -0.198368446740192"
## [1] "Current number of clusters is 4"
## [1] "Current resolution is 0.01"
## [1] "Current resolution is 0.02"
## [1] "Current resolution is 0.03"
## [1] "Current resolution is 0.04"
## [1] "Current resolution is 0.05"
## [1] "Current resolution is 0.06"
## [1] "Current resolution is 0.07"
## [1] "Current silhouette width is -0.250966030668032"
## [1] "Current number of clusters is 5"
## [1] "Current resolution is 0.08"
## [1] "Current silhouette width is -0.231349839779394"
## [1] "Current number of clusters is 6"
## [1] "Current resolution is 0.09"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.2"
## [1] "Current silhouette width is -0.225690886650997"
## [1] "Current number of clusters is 8"
## [1] "Current resolution is 0.3"
## [1] "Current silhouette width is -0.0805477795435761"
## [1] "Current number of clusters is 10"
## [1] "Current resolution is 0.4"
## [1] "Current silhouette width is -0.245845298727118"
## [1] "Current number of clusters is 11"
## [1] "Current resolution is 0.5"
## [1] "Current silhouette width is -0.209231139505976"
## [1] "Current number of clusters is 13"
## [1] "Current resolution is 0.6"
## [1] "Current resolution is 0.7"
## [1] "Current silhouette width is -0.0944739055016626"
## [1] "Current number of clusters is 15"
## [1] "Current resolution is 0.8"
## [1] "Current resolution is 0.9"
## [1] "Current resolution is 1"
## [1] "Current resolution is 1"
## [1] "Current resolution is 2"
## [1] "Current silhouette width is -0.116676659786663"
## [1] "Current number of clusters is 22"
## [1] "Current resolution is 3"
## [1] "Current silhouette width is -0.125209706717626"
## [1] "Current number of clusters is 28"
## [1] "Current resolution is 4"
## [1] "Current silhouette width is -0.251949052251048"
## [1] "Current number of clusters is 34"
## [1] "Current resolution is 5"
## [1] "Current silhouette width is -0.192768769309449"
## [1] "Current number of clusters is 40"
## [1] "Current resolution is 6"
## [1] "Current silhouette width is -0.347549239779038"
## [1] "Current number of clusters is 45"
## [1] "Current resolution is 7"
## [1] "Current silhouette width is -0.28004542953883"
## [1] "Current number of clusters is 53"
## [1] "Current resolution is 8"
## [1] "Current silhouette width is -0.222344288810828"
## [1] "Current number of clusters is 58"
## [1] "Current resolution is 9"
## [1] "Current silhouette width is -0.295608672860346"
## [1] "Current number of clusters is 64"
## [1] "Current resolution is 10"
## [1] "Current silhouette width is -0.330103497513141"
## [1] "Current number of clusters is 69"
## [1] "2M"
## [1] "Current resolution is 0.005"
## [1] "Current silhouette width is -0.00649730398099191"
## [1] "Current number of clusters is 2"
## [1] "Current resolution is 0.01"
## [1] "Current resolution is 0.02"
## [1] "Current resolution is 0.03"
## [1] "Current resolution is 0.04"
## [1] "Current silhouette width is -0.010702258369204"
## [1] "Current number of clusters is 3"
## [1] "Current resolution is 0.05"
## [1] "Current resolution is 0.06"
## [1] "Current resolution is 0.07"
## [1] "Current resolution is 0.08"
## [1] "Current resolution is 0.09"
## [1] "Current silhouette width is -0.0172514257714861"
## [1] "Current number of clusters is 4"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.2"
## [1] "Current resolution is 0.3"
## [1] "Current silhouette width is -0.0182525510068341"
## [1] "Current number of clusters is 5"
## [1] "Current resolution is 0.4"
## [1] "Current resolution is 0.5"
## [1] "Current silhouette width is -0.0319931395710725"
## [1] "Current number of clusters is 7"
## [1] "Current resolution is 0.6"
## [1] "Current resolution is 0.7"
## [1] "Current silhouette width is -0.0302992944167953"
## [1] "Current number of clusters is 9"
## [1] "Current resolution is 0.8"
## [1] "Current silhouette width is -0.0330197371340559"
## [1] "Current number of clusters is 10"
## [1] "Current resolution is 0.9"
## [1] "Current silhouette width is -0.0459159879805063"
## [1] "Current number of clusters is 11"
## [1] "Current resolution is 1"
## [1] "Current silhouette width is -0.0793688966633591"
## [1] "Current number of clusters is 12"
## [1] "Current resolution is 1"
## [1] "Current resolution is 2"
## [1] "Current silhouette width is -0.0685155550174229"
## [1] "Current number of clusters is 19"
## [1] "Current resolution is 3"
## [1] "Current silhouette width is -0.102223660052064"
## [1] "Current number of clusters is 24"
## [1] "Current resolution is 4"
## [1] "Current silhouette width is -0.103833034157114"
## [1] "Current number of clusters is 31"
## [1] "Current resolution is 5"
## [1] "Current silhouette width is -0.131616296340424"
## [1] "Current number of clusters is 37"
## [1] "Current resolution is 6"
## [1] "Current silhouette width is -0.115185684494556"
## [1] "Current number of clusters is 41"
## [1] "Current resolution is 7"
## [1] "Current silhouette width is -0.162701225248331"
## [1] "Current number of clusters is 44"
## [1] "Current resolution is 8"
## [1] "Current silhouette width is -0.149213578032777"
## [1] "Current number of clusters is 47"
## [1] "Current resolution is 9"
## [1] "Current silhouette width is -0.128743489150844"
## [1] "Current number of clusters is 51"
## [1] "Current resolution is 10"
## [1] "Current silhouette width is -0.176211926577168"
## [1] "Current number of clusters is 57"
## [1] "2F"
## [1] "Current resolution is 0.005"
## [1] "Current resolution is 0.01"
## [1] "Current silhouette width is -0.0314677648448414"
## [1] "Current number of clusters is 2"
## [1] "Current resolution is 0.02"
## [1] "Current resolution is 0.03"
## [1] "Current resolution is 0.04"
## [1] "Current resolution is 0.05"
## [1] "Current resolution is 0.06"
## [1] "Current silhouette width is -0.026454858473299"
## [1] "Current number of clusters is 3"
## [1] "Current resolution is 0.07"
## [1] "Current silhouette width is -0.0571885733605773"
## [1] "Current number of clusters is 4"
## [1] "Current resolution is 0.08"
## [1] "Current resolution is 0.09"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.2"
## [1] "Current silhouette width is -0.0216034933152717"
## [1] "Current number of clusters is 5"
## [1] "Current resolution is 0.3"
## [1] "Current resolution is 0.4"
## [1] "Current silhouette width is -0.0249616244456837"
## [1] "Current number of clusters is 7"
## [1] "Current resolution is 0.5"
## [1] "Current resolution is 0.6"
## [1] "Current silhouette width is -0.0310750357643001"
## [1] "Current number of clusters is 9"
## [1] "Current resolution is 0.7"
## [1] "Current silhouette width is -0.0322967352797902"
## [1] "Current number of clusters is 10"
## [1] "Current resolution is 0.8"
## [1] "Current resolution is 0.9"
## [1] "Current resolution is 1"
## [1] "Current resolution is 1"
## [1] "Current resolution is 2"
## [1] "Current silhouette width is -0.0568674780657195"
## [1] "Current number of clusters is 14"
## [1] "Current resolution is 3"
## [1] "Current silhouette width is -0.066423304195434"
## [1] "Current number of clusters is 21"
## [1] "Current resolution is 4"
## [1] "Current silhouette width is -0.130084136666141"
## [1] "Current number of clusters is 27"
## [1] "Current resolution is 5"
## [1] "Current silhouette width is -0.0949918438495278"
## [1] "Current number of clusters is 31"
## [1] "Current resolution is 6"
## [1] "Current silhouette width is -0.129340508084619"
## [1] "Current number of clusters is 35"
## [1] "Current resolution is 7"
## [1] "Current silhouette width is -0.162644225840953"
## [1] "Current number of clusters is 40"
## [1] "Current resolution is 8"
## [1] "Current silhouette width is -0.156170082988737"
## [1] "Current number of clusters is 47"
## [1] "Current resolution is 9"
## [1] "Current silhouette width is -0.156943365462931"
## [1] "Current number of clusters is 52"
## [1] "Current resolution is 10"
## [1] "Current silhouette width is -0.246543682845399"
## [1] "Current number of clusters is 54"
resolution_plots <- purrr::map2(avgs_sil_width_dfs, names(avgs_sil_width_dfs), function(df, cond) {
  ggplot(df, aes(num_k, sil_width, label = num_k)) +
    geom_text() +
    labs(title = cond,
         x = "Number of clusters (k)", 
         y = "Average Silhouette Width") +
    theme_classic() +
    theme(plot.title = element_text(hjust = 0.5))
})
resolution_plots
## $`0M`

## 
## $`0F`

## 
## $`2M`

## 
## $`2F`

We aim to determine a number of clusters that maximizes silhouette width while preventing overstratification. Judgding by the previous plots, we define the following k:

  • Day 0 - Male: 6
  • Day 0 - Female: 6
  • Day 2 - Male: 4
  • Day 2 - Female: 3
optimal_k <- c(6, 6, 4, 3)
t_act_l <- purrr::pmap(list(t_act_l, avgs_sil_width_dfs, optimal_k), function(seurat, df, opt_k) {
  opt_resolution <- df[df$num_k == opt_k, "resolution"]
  seurat <- FindClusters(object = seurat, resolution = opt_resolution)
  seurat
})
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6092
## Number of edges: 245369
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9629
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6751
## Number of edges: 271189
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9625
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3218
## Number of edges: 123564
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9486
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2705
## Number of edges: 108671
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9500
## Number of communities: 3
## Elapsed time: 0 seconds
purrr::map(t_act_l, ~ length(levels(.x$seurat_clusters)))
## $`0M`
## [1] 6
## 
## $`0F`
## [1] 6
## 
## $`2M`
## [1] 4
## 
## $`2F`
## [1] 3

7 Non-linear dimensionality reduction

We can visualize the former clusters with a t-Stochastic Neighbor Embedding (tSNE) and uniform manifold approximation and projection (UMAP), which allow to depict more structure in the data than PCA:

t_act_l <- purrr::map(t_act_l, function(seurat) {
  seurat@reductions$PCA <- NULL
  seurat@reductions$TSNE <- NULL
  seurat@reductions$UMAP <- NULL
  seurat
})
t_act_l <- purrr::map(t_act_l, RunTSNE, reduction = "pca", dims = 1:20)
t_act_l <- purrr::map(t_act_l, RunUMAP, reduction = "pca", dims = 1:20)
purrr::map(t_act_l, DimPlot, reduction = "tsne")
## $`0M`

## 
## $`0F`

## 
## $`2M`

## 
## $`2F`

purrr::map(t_act_l, DimPlot, reduction = "umap")
## $`0M`

## 
## $`0F`

## 
## $`2M`

## 
## $`2F`

# saveRDS(t_act_l, file = "results/R_objects/t_act_Seurat_list_pre_processed.rds")
# t_act_l <- readRDS("results/R_objects/t_act_Seurat_list_pre_processed.rds")

8 Find cluster markers

Let us find the markers for each of the clusters. That is, the genes that are exclusively expressed in one cluster:

markers_l <- purrr::map(t_act_l, FindAllMarkers, only.pos = TRUE)
DT::datatable(markers_l$`0M`)
DT::datatable(markers_l$`0F`)
DT::datatable(markers_l$`2M`)
DT::datatable(markers_l$`2F`)
# saveRDS(markers_l, file = "results/R_objects/t_act_markers_list.rds")
marker_selection <- purrr::map2(t_act_l, markers_l, function(seurat, df) {
  num_k <- length(unique(seurat$seurat_clusters))
  selection <- unlist(purrr::map(0:(num_k-1), ~df[df$cluster == .x, "gene"][0:8]))
  selection
})
heatmaps_markers <- purrr::map2(t_act_l, marker_selection, ~DoHeatmap(.x, features = .y) + NoLegend())
heatmaps_markers
## $`0M`

## 
## $`0F`

## 
## $`2M`

## 
## $`2F`

9 Cell cycle scoring

After inspecting the markers, there are still some clusters of unknown identity. Let us shed more light by detecting the cycling cells in our datasets using cell cycle signatures:

t_act_l <- purrr::map(t_act_l, function(seurat) {
  s_genes <- cc.genes$s.genes[cc.genes$s.genes %in% rownames(seurat@assays$RNA@scale.data)]
  g2m_genes <- cc.genes$g2m.genes[cc.genes$g2m.genes %in% rownames(seurat@assays$RNA@scale.data)]
  seurat <- CellCycleScoring(object = seurat, s.features = s_genes, g2m.features = g2m_genes)
  seurat
})
cycling_gg <- purrr::map(t_act_l, FeaturePlot, features = c("S.Score", "G2M.Score"), cols = viridis(20))
cycling_gg
## $`0M`

## 
## $`0F`

## 
## $`2M`

## 
## $`2F`

10 Annotation

With the previous results, we can proceed to annotate the cells to its corresponding cell type

10.1 Day 0 - Male donor

Cluster ID Cell type
0 CD4 T
1 Monocytes
2 Cytotoxic
3 B
4 FCGR3A Monocytes
5 Dendritic Cells
FeaturePlot(t_act_l$`0M`, features = "IL7R")

t_act_l$`0M`$cell_type <- case_when(
  t_act_l$`0M`$seurat_clusters == "0" ~ "CD4 T",
  t_act_l$`0M`$seurat_clusters == "1" ~ "Monocyte",
  t_act_l$`0M`$seurat_clusters == "2" ~ "Cytotoxic",
  t_act_l$`0M`$seurat_clusters == "3" ~ "B",
  t_act_l$`0M`$seurat_clusters == "4" ~ "FCGR3A Monocyte",
  t_act_l$`0M`$seurat_clusters == "5" ~ "Dendritic Cell"
)
Idents(t_act_l$`0M`) <- "cell_type"
DimPlot(t_act_l$`0M`, reduction = "umap", label = TRUE) + NoLegend()

10.2 Day 0 - Female donor

Cluster ID Cell type
0 CD4 T
1 Cytotoxic
2 Monocyte
3 B
4 Monocyte
5 Unknown
FeaturePlot(t_act_l$`0F`, features = "IL7R")

t_act_l$`0F`$cell_type <- case_when(
  t_act_l$`0F`$seurat_clusters == "0" ~ "CD4 T",
  t_act_l$`0F`$seurat_clusters == "1" ~ "Cytotoxic",
  t_act_l$`0F`$seurat_clusters == "2" ~ "Monocyte",
  t_act_l$`0F`$seurat_clusters == "3" ~ "B",
  t_act_l$`0F`$seurat_clusters == "4" ~ "Monocyte",
  t_act_l$`0F`$seurat_clusters == "5" ~ "Unknown"
)
Idents(t_act_l$`0F`) <- "cell_type"
DimPlot(t_act_l$`0F`, reduction = "umap", label = TRUE) + NoLegend()

10.3 Day 2 - Male donor

Cluster ID Cell type
0 Cycling
1 CD4 T
2 Cytotoxic
3 B
t_act_l$`2M`$cell_type <- case_when(
  t_act_l$`2M`$seurat_clusters == "0" ~ "Cycling",
  t_act_l$`2M`$seurat_clusters == "1" ~ "CD4 T",
  t_act_l$`2M`$seurat_clusters == "2" ~ "Cytotoxic",
  t_act_l$`2M`$seurat_clusters == "3" ~ "B"
)
Idents(t_act_l$`2M`) <- "cell_type"
DimPlot(t_act_l$`2M`, reduction = "umap", label = TRUE) + NoLegend()

10.4 Day 2 - Female donor

Cluster ID Cell type
0 T
1 Cycling
2 B
t_act_l$`2F`$cell_type <- case_when(
  t_act_l$`2F`$seurat_clusters == "0" ~ "T",
  t_act_l$`2F`$seurat_clusters == "1" ~ "Cycling",
  t_act_l$`2F`$seurat_clusters == "2" ~ "B"
)
Idents(t_act_l$`2F`) <- "cell_type"
DimPlot(t_act_l$`2F`, reduction = "umap", label = TRUE) + NoLegend()

11 Save

# saveRDS(t_act_l, file = "results/R_objects/t_act_Seurat_list_annotated")
# t_act <- SplitObject(t_act, split.by = "donor")
# 
# # Male
#   t_act_m <- pre_process_seurat(t_act$Male)
#   Idents(t_act_m) <- "day"
#   umap_day <- DimPlot(t_act_m, reduction = "umap", pt.size = 0.9)
#   Idents(t_act_m) <- "time"
#   umap_time <- DimPlot(t_act_m, reduction = "umap", pt.size = 0.9)
#   ggarrange(plotlist = list(umap_day, umap_time))
#   monocyte_male <- FeaturePlot(t_act_m, features = "LYZ", reduction = "umap", pt.size = 0.9)
# 
# # Female
# t_act_f <- pre_process_seurat(t_act$Female)
# Idents(t_act_f) <- "day"
# umap_day <- DimPlot(t_act_f, reduction = "umap", pt.size = 0.9)
# Idents(t_act_f) <- "time"
# umap_time <- DimPlot(t_act_f, reduction = "umap", pt.size = 0.9)
# ggarrange(plotlist = list(umap_day, umap_time))
# monocyte_female <- FeaturePlot(t_act_f, features = "LYZ", reduction = "umap", pt.size = 0.9)
# monocyte_female
# 
# 
# # score with stimulated-T cells signature
# t_act_f <- AddModuleScore(t_act_f, features = list(t_stimuli_sign), name = "stimulated_t_score")
# FeaturePlot(t_act_f, features = "stimulated_t_score1", reduction = "umap", pt.size = 0.9)
# 
# FeaturePlot(t_act_f, features = "TRAC", reduction = "umap", pt.size = 0.9)

EXPLORATORY2:

# Split by day and donor: Day 0 - Male Donor
# seurat_l <- SplitObject(t_act$day_0, split.by = "donor")
# seurat <- seurat_l$Male
# seurat <- pre_process_seurat(seurat)
# seurat <- FindVariableFeatures(seurat)
# seurat <- ScaleData(seurat)
# seurat <- RunPCA(seurat)
# seurat <- RunUMAP(seurat, dims = 1:15, reduction = "pca")
# seurat@reductions$UMAP <- NULL
# Idents(seurat) <- "time"
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# seurat <- FindNeighbors(seurat)
# seurat <- FindClusters(seurat, dims = 1:20, reduction = "pca", resolution = 0.1)
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# markers <- FindAllMarkers(seurat)
# marker_selection <- unlist(map(0:5, ~markers[markers$cluster == ., "gene"][0:5]))
# marker_selection
# DoHeatmap(seurat, features = marker_selection) + NoLegend()
# FeaturePlot(seurat, features = c("IL7R", "GNLY", "LYZ", "MS4A1", "FCGR3A"), reduction = "umap")
# 
# 
# # Split by day and donor: Day 2 - Male Donor
# seurat_l <- SplitObject(t_act$day_2, split.by = "donor")
# seurat <- seurat_l$Male
# seurat <- pre_process_seurat(seurat)
# seurat@reductions$UMAP <- NULL
# Idents(seurat) <- "time"
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# seurat <- FindNeighbors(seurat)
# seurat <- FindClusters(seurat, dims = 1:20, reduction = "pca", resolution = 0.1)
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# markers <- FindAllMarkers(seurat)
# marker_selection <- unlist(map(0:4, ~markers[markers$cluster == ., "gene"][0:10]))
# marker_selection
# DoHeatmap(seurat, features = marker_selection) + NoLegend()
# FeaturePlot(seurat, features = c("IL7R", "GNLY", "MS4A1"), reduction = "umap")
# 
# seurat <- subset(seurat, subset = seurat_clusters != "2")
# seurat <- CellCycleScoring(seurat, s.features = cc.genes$s.genes, g2m.features = cc.genes$g2m.genes)
# FeaturePlot(seurat, features = c("S.Score", "G2M.Score"))
# seurat <- pre_process_seurat(seurat, vars_to_regress = c("S.Score", "G2M.Score"))
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# markers2 <- FindAllMarkers(seurat)
# marker_selection2 <- unlist(map(c(0,1,3,4), ~markers2[markers2$cluster == ., "gene"][0:8]))
# marker_selection2
# DoHeatmap(seurat, features = marker_selection) + NoLegend()
# FeaturePlot(seurat, features = c("IL7R", "GNLY", "MS4A1", "IFNG"), reduction = "umap")
# Idents(seurat) <- "seurat_clusters"
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# 
# b <- subset(seurat, idents = "4")
# b <- pre_process_seurat(b)
# Idents(b) <- "time"
# DimPlot(b, reduction = "umap", pt.size = 0.8)
# b <- FindNeighbors(b)
# b <- FindClusters(b, resolution = 0.2)
# DimPlot(b, reduction = "umap", pt.size = 0.8)
# markers_b <- FindAllMarkers(b)